Decision Tree Code


Decision Tree Algorithm Python Code

Data Perparation Code Python Code

The Train Dataset Train Set

The Test Dataset Test Set

Decision Tree Algorithm Python

import pandas as pd
from sklearn import tree
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import confusion_matrix
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score
import graphviz

playerTrainDF = pd.read_csv("DTTrainDF.csv")
playerTestDF = pd.read_csv("DTTestDF.csv")

playerTestLabel = playerTestDF['performance']

playerTestDF = playerTestDF.drop(['performance'],axis = 1)
playerTrainDF_nolabel = playerTrainDF.drop(['performance'],axis = 1)
playerTrainLabel = playerTrainDF['performance']

dropcols = ['season','teamName','playerName','position']
playerTrainDF_nolabel_quant = playerTrainDF_nolabel.drop(dropcols,axis = 1)
playerTestDF_quant = playerTestDF.drop(dropcols,axis=1)


############ Tree 1: Gini & Best ################################

model = DecisionTreeClassifier(criterion='gini', splitter='best', max_depth=None,
                  min_samples_split=2, min_samples_leaf=1,
                  min_weight_fraction_leaf=0.0,
                  max_features=None, random_state=42,
                  max_leaf_nodes=None, min_impurity_decrease=0.0,
                  class_weight=None, ccp_alpha=0.0)

model.fit(playerTrainDF_nolabel_quant,playerTrainLabel)

TREE_Vis = tree.export_graphviz(model,
                    out_file=None,
                    feature_names=playerTrainDF_nolabel_quant.columns,
                    class_names=["Good", "Normal", "Bad"],
                    filled=True, rounded=True,
                    special_characters=True)

graph = graphviz.Source(TREE_Vis)
graph

predictions = model.predict(playerTestDF_quant)
conf_matrix = confusion_matrix(playerTestLabel, predictions)
accuracy = accuracy_score(playerTestLabel, predictions)
print(accuracy)


labels = ['Good','Normal','Bad']
sns.heatmap(conf_matrix, annot=True,cmap='Blues',xticklabels=labels, yticklabels=labels, cbar=False)
plt.title("Confusion Matrix",fontsize=20)
plt.xlabel("Actual", fontsize=15)
plt.ylabel("Predicted", fontsize=15)
plt.show()

################## Tree plot 2 Entropy & Best splitter ######################

model2 = DecisionTreeClassifier(criterion='entropy',splitter='best',max_depth=None,
                  min_samples_split=2, min_samples_leaf=1,
                  min_weight_fraction_leaf=0.0,
                  max_features=None, random_state=42,
                  max_leaf_nodes=None, min_impurity_decrease=0.0,
                  class_weight=None, ccp_alpha=0.0)

model2.fit(playerTrainDF_nolabel_quant,playerTrainLabel)
TREE_Vis = tree.export_graphviz(model2,
                    out_file=None,
                    feature_names=playerTrainDF_nolabel_quant.columns,
                    class_names=["Good", "Normal", "Bad"],
                    filled=True, rounded=True,
                    special_characters=True)
graph = graphviz.Source(TREE_Vis)
graph

predictions2 = model2.predict(playerTestDF_quant)
conf_matrix2 = confusion_matrix(playerTestLabel, predictions2)
accuracy2 = accuracy_score(playerTestLabel, predictions2)
print(conf_matrix2)
print(accuracy2)

sns.heatmap(conf_matrix2, annot=True,cmap='Blues',xticklabels=labels, yticklabels=labels, cbar=False)
plt.title("Confusion Matrix",fontsize=20)
plt.xlabel("Actual", fontsize=15)
plt.ylabel("Predicted", fontsize=15)
plt.show()

######################### Tree Plot 3 Gini and random splitter #########################

model3 = DecisionTreeClassifier(criterion='gini',splitter='random',max_depth=None,
                  min_samples_split=2, min_samples_leaf=1,
                  min_weight_fraction_leaf=0.0,
                  max_features=None, random_state=42,
                  max_leaf_nodes=None, min_impurity_decrease=0.0,
                  class_weight=None, ccp_alpha=0.0)
model3.fit(playerTrainDF_nolabel_quant,playerTrainLabel)
TREE_Vis = tree.export_graphviz(model3,
                    out_file=None,
                    feature_names=playerTrainDF_nolabel_quant.columns,
                    class_names=["Good", "Normal", "Bad"],
                    filled=True, rounded=True,
                    special_characters=True)
graph = graphviz.Source(TREE_Vis)
graph

predictions3 = model3.predict(playerTestDF_quant)
conf_matrix3 = confusion_matrix(playerTestLabel, predictions3)
accuracy3 = accuracy_score(playerTestLabel, predictions3)
print(conf_matrix3)
print(accuracy3)

sns.heatmap(conf_matrix3, annot=True,cmap='Blues',xticklabels=labels, yticklabels=labels, cbar=False)
plt.title("Confusion Matrix",fontsize=20)
plt.xlabel("Actual", fontsize=15)
plt.ylabel("Predicted", fontsize=15)
plt.show()

  TOC